## Executed March 2, 2024, 
## using following software:
## (1) 
### R version 4.3.1 (2023-06-16) -- "Beagle Scouts"
### Copyright (C) 2023 The R Foundation for Statistical Computing
### Platform: x86_64-pc-linux-gnu (64-bit)
## (2) 
### RStudio 2023.06.0+421 "Mountain Hydrangea" Release
### (583b465ecc45e60ee9de085148cd2f9741cc5214, 2023-06-06) 
### for Ubuntu Focal Mozilla/5.0 (X11; Linux x86_64) 
### AppleWebKit/537.36 (KHTML, like Gecko) rstudio/2023.06.0+421 
### Chrome/110.0.5481.208 Electron/23.3.0 Safari/537.36
## (3) 
### Ubuntu 22.04.4 LTS (64 bit)

## Clean Up Space
rm(list=ls())

## Set working directory to the location of this file (through RStudio)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

## Required Packages
library(stringr)
library(quanteda)

## Function extracting difference-in-difference estimates
exteff <- function(m, vcov, tr, mod, modval) {
  mc <- coeftest(m, vcov. = vcov)
  ## Exporting marginal effects and SEs
  out <- data.frame(modval = modval, est = NA, se = NA)
  if (all(!modval%in%"subtract")) {
    for(i in 1:length(modval)) {
      if (length(which(rownames(mc)==paste0(mod,modval[i])))==0) {
        out$est[i] <- mc[which(rownames(mc)==tr),1]
        out$se[i] <- mc[which(rownames(mc)==tr),2]
      } else {
        out$est[i] <- mc[which(rownames(mc)==tr),1] + 
          mc[which(rownames(mc)==paste0(tr,":",mod,modval[i])),1]
        out$se[i] <- sqrt(
          mc[which(rownames(mc)==tr),2]^2 + 
            1^2 * 
            mc[which(rownames(mc)==paste0(tr,":",mod,modval[i])),2]^2 + 
            2 * 1 * 
            vcov[which(rownames(mc)==tr),
                 which(rownames(mc)==paste0(tr,":",mod,modval[i]))]
        )
      }
    }
    ## Exporting difference in coefficients and SEs  
  } else if (length(modval)==1 & all(modval=="subtract")) {
    out <- data.frame(modval = tr, est = NA, se = NA)
    for(i in 1:length(tr)) {
      ## Reference Category vs mod
      if (tr[i]=="REFERENCE") {
        out$est[i] <- (-1)*mc[which(rownames(mc)==mod),1]
        out$se[i] <- mc[which(rownames(mc)==mod),2]
        ## Just create zero  
      } else if (tr[i]=="SAME") {
        out$est[i] <- 0
        out$se[i] <- NA
        ## All else
      } else {
        out$est[i] <- mc[which(rownames(mc)==tr[i]),1] + 
          (-1)*mc[which(rownames(mc)==mod),1]
        out$se[i] <- sqrt(
          mc[which(rownames(mc)==tr[i]),2]^2 + 
            (-1)^2 * mc[which(rownames(mc)==mod),2]^2 + 
            2 * (-1) * vcov[which(rownames(mc)==tr[i]),
                            which(rownames(mc)==mod)]
        )
      }
    }
  }
  out$lci90 <- out$est - out$se*qnorm(0.95)
  out$uci90 <- out$est + out$se*qnorm(0.95)
  out$lci95 <- out$est - out$se*qnorm(0.975)
  out$uci95 <- out$est + out$se*qnorm(0.975)
  return(out)
}

## Read in Data (Drop years prior to 1992)
load("ungd_files.RData")
d <- subset(ungd_files, Year >=1992)
rm(ungd_files)

## Character vectors that identify state groups
CAstates <- c("KAZ","UZB","TKM","KGZ","TJK")
NonCAstates <- c("MDA","ARM","AZE",
                 "EST","LVA","LTU")
Balticstates <- c("EST","LVA","LTU")
Caucasusstates <- c("MDA","ARM","AZE")
Belarus <- c("BLR")
Georgia <- c("GEO")
Ukraine <- c("UKR")
Russia <- c("RUS")
### Setting colors attached to each group
clrscale <- c("#d95f02","#1b9e77","#666666",
              "#7570b3","#66a61e",
              "#e7298a","#a6761d","#e6ab02")

## Check the existence of Post-Soviet States
table(d$Year[d$Country%in%CAstates],
      d$Country[d$Country%in%CAstates])
table(d$Year[d$Country%in%NonCAstates],
      d$Country[d$Country%in%NonCAstates])
table(d$Year[d$Country%in%c(Belarus,Ukraine,Georgia)],
      d$Country[d$Country%in%c(Belarus,Ukraine,Georgia)])

## Create Character Vector of Speech
tvec_tgt <- d$text %>% str_squish
## typo fix
tvec_tgt <- gsub("soverign","sovereign",tvec_tgt)

## Generate Sentence Level Corpus
cs_tgt <- corpus(tvec_tgt, docnames=d$doc_id) 
head(cs_tgt)

## Manually setting words to compound
multiword <- c("United Nations", 
               "General Assembly",
               "right to exist",
               "territorial integrity",
               "Territorial integrity")

## Generate Tokens
tk_tgt <- tokens(cs_tgt, remove_punct = TRUE) %>% 
  tokens_compound(pattern=phrase(multiword))

tk_tgt[[which(names(tk_tgt)=="FIN_2022")]]

## Keywords in Context (Examples Shown in Table 1)
samplekeys <- function(key, cy, n=1, seed=50, w=5) {
  set.seed(seed)
  tmp <- kwic(tk_tgt[which(names(tk_tgt)==cy)], key, window=w)
  tmp[sample(nrow(tmp),n),]
}
samplekeys("sovereign","FIN_2022")
samplekeys("sovereignty","LTU_2014")
samplekeys("territorial_integrity","GEO_2008")
samplekeys("autonomy","UKR_2015", w=6)
samplekeys("independent","MDA_1997", w=10)
samplekeys("independence","ARM_2022",n=2)[2]
samplekeys("self-determination","LVA_2005")
samplekeys("interference","TKM_1993",n=2)[2]
samplekeys("non-intervention","KGZ_2015",w=7)
samplekeys("right_to_exist","LVA_2022")

## Split by sentences (modify data)
cs_tgt <- cs_tgt %>% 
  corpus_reshape("sentences")
head(cs_tgt)
tk_tgt <- tokens(cs_tgt, #stem = TRUE, 
                 remove_punct = TRUE) %>% 
  tokens_compound(pattern=phrase(multiword))

## Document Feature Matrix for Specified Words
keywords <- c("sovereign","sovereignty","sovereignly",
              "territorial_integrity",
              "autonomy","autonomous", 
              "independence","independent",
              "self-determination",
              "self-governance","self-government", 
              "interfere", "interference", "right_to_exist",
              "non-intervention")
dfm_tgt_keys <- tk_tgt %>% 
  tokens_select(keywords) %>% 
  dfm() %>%
  convert(to="data.frame")
dfm_tgt_keys$key_all <- (rowSums(dfm_tgt_keys[,-1])>0)*1
dfm_tgt_keys$sentence_id <- dfm_tgt_keys$doc_id
dfm_tgt_keys$doc_id <- gsub("\\..*$", "", dfm_tgt_keys$sentence_id)

## Generate Grand Dataset
dsov <- merge(d, dfm_tgt_keys)

## Weighting Variable (Inverse of Speech Length)
dsov$w <- NA
cy <- paste(dsov$Country,dsov$Year)
for(c in unique(cy)) {
  tmp <- length(which(cy==c))
  dsov$w[which(cy==c)] <- (1/tmp)*100
  cat(paste0(c," "))
}
rm(cy,tmp)
hist(dsov$w)

##########################
## Descriptive Analysis ##
##########################

## Trend for CA states
dsovca <- subset(dsov, Country %in% CAstates) 
dsovca_annual <- 
  data.frame(x = as.numeric(
    tapply(dsovca[,c("key_all","w")], dsovca$Year,
           function(d) weighted.mean(d$key_all,d$w))),
    Year = unique(dsovca$Year))
dsovca_annual$Region <- "Central Asia (CA)"

## Trend for non-CA post-Soviet states
dsovps <- subset(dsov, Country %in% c(NonCAstates)) #
dsovps_annual <- 
  data.frame(x = as.numeric(
    tapply(dsovps[,c("key_all","w")], dsovps$Year,
           function(d) weighted.mean(d$key_all,d$w))),
    Year = unique(dsovps$Year))
dsovps_annual$Region <- "Non-CA post-Soviet\n(excl. Russia, Belarus, Ukraine, Georgia)"

## Trend for non-post-Soviet states
dsovelse <- subset(dsov, !Country %in% c(CAstates,NonCAstates,Ukraine,Georgia,Belarus,Russia)) 
dsovelse_annual <- 
  data.frame(x = as.numeric(
    tapply(dsovelse[,c("key_all","w")], dsovelse$Year,
           function(d) weighted.mean(d$key_all,d$w))),
    Year = unique(dsovelse$Year))
dsovelse_annual$Region <- "Non-Post-Soviet"
dsovelse_annualy <- dsovelse_annual
dsovelse_annualy$Region <- "Non-Post-Soviet"

## Binding all data into one dataset
dsov_annual_cb <- rbind(dsovca_annual, dsovps_annual, 
                        dsovelse_annual)
dsov_annual_cb$Region <- factor(dsov_annual_cb$Region,
                                   levels=unique(dsov_annual_cb$Region))

## Plot trends in sovereignty frame appearances
## (THIS IS FIGURE 1)
require(ggplot2)
ggplot(dsov_annual_cb, aes(x=Year,y=x)) + 
  geom_vline(aes(xintercept=2008), color="gray30", linetype=2, alpha=0.5, linewidth=0.4) + 
  geom_vline(aes(xintercept=2014), color="gray30", linetype=2, alpha=0.5, linewidth=0.4) + 
  geom_vline(aes(xintercept=2022), color="gray30", linetype=2, alpha=0.5, linewidth=0.4) + 
  geom_point(aes(color=Region,shape=Region), size=2.5) + 
  geom_line(aes(color=Region,linetype=Region)) + 
  scale_x_continuous(breaks=c(1992,2000,2008,2010,2014,2020,2022)) +
  scale_shape_manual(name="Region", values=c(16,17,15)) + 
  scale_linetype_manual(name="Region", values=c(1,2,3)) + 
  scale_color_manual(name="Region", values=clrscale[1:3]) + 
  labs(y = "Probability that sovereignty-related words appear\nin a given speech sentene",
       x = "Year of UNGA (General Debate is in September)",
       caption = 
         paste0("Note 1: Central Asian states include Kazakhstan, Kyrgyzstan, Tajikistan, Turkmenistan, and Uzbeistan.\n",
                "Note 2: Non-CA post-Soviet states include Moldova, Armenia, Azerbaijan, Estonia, Latvia, and Lithuania.\n",
                "Note 3: Probability is inversely weighted by speech length so that each speech is treated equally.")) + 
  theme_classic() + 
  theme(legend.position = c(0.3,0.8),
        legend.background = element_rect(color="black"),
        plot.title = element_text(hjust=0.5),
        plot.caption = element_text(hjust=0, size=10),
        plot.caption.position = "plot")

ggsave("trend_analysis_main_v3.pdf", 
       width = 9, height = 6)

## Separating Baltic and Non-Baltic Non-CA Post-Soviet states
## as well as adding Belarus

## Trend for Baltic States
dsovbal <- subset(dsov, Country %in% c(Balticstates)) #
dsovbal_annual <- 
  data.frame(x = as.numeric(
    tapply(dsovbal[,c("key_all","w")], dsovbal$Year,
           function(d) weighted.mean(d$key_all,d$w))),
    Year = unique(dsovbal$Year))
dsovbal_annual$Region <- "Baltic (Estonia, Latvia, Lithuania)"

## Trend for non-Baltic States
dsovnonbal <- subset(dsov, Country %in% c(Caucasusstates)) #
dsovnonbal_annual <- 
  data.frame(x = as.numeric(
    tapply(dsovnonbal[,c("key_all","w")], dsovnonbal$Year,
           function(d) weighted.mean(d$key_all,d$w))),
    Year = unique(dsovnonbal$Year))
dsovnonbal_annual$Region <- "Caucasus/Eastern Europe (Armenia, Azerbaijan, Moldova)"

## Trend for Belarus
dsovblr <- subset(dsov, Country %in% c(Belarus)) #
dsovblr_annual <- 
  data.frame(x = as.numeric(
    tapply(dsovblr[,c("key_all","w")], dsovblr$Year,
           function(d) weighted.mean(d$key_all,d$w))),
    Year = unique(dsovblr$Year))
dsovblr_annual$Region <- "Belarus"

## Binding all data into one dataset
dsov_annual_cbs <- rbind(dsovbal_annual, dsovnonbal_annual, 
                         dsovblr_annual, dsovelse_annual)
dsov_annual_cbs$Region <- factor(dsov_annual_cbs$Region,
                                 levels=unique(dsov_annual_cbs$Region))

## Plot trends in sovereignty frame appearances
## (THIS IS FIGURE A4 in ONLINE APPENDIX C)
require(ggplot2)
ggplot(dsov_annual_cbs, aes(x=Year,y=x)) + 
  geom_vline(aes(xintercept=2008), color="gray30", linetype=2, alpha=0.5, linewidth=0.4) + 
  geom_vline(aes(xintercept=2014), color="gray30", linetype=2, alpha=0.5, linewidth=0.4) + 
  geom_vline(aes(xintercept=2022), color="gray30", linetype=2, alpha=0.5, linewidth=0.4) + 
  geom_point(aes(color=Region,shape=Region), size=2.5) + 
  geom_line(aes(color=Region,linetype=Region)) + 
  scale_x_continuous(breaks=c(1992,2000,2008,2010,2014,2020,2022)) +  
  scale_shape_manual(name="Region", values=c(2,17,18,15)) + 
  scale_linetype_manual(name="Region", values=c(2,2,2,3)) + 
  scale_color_manual(name="Region", values=clrscale[c(4,5,6,3)]) + 
  labs(y = "Probability that sovereignty-related words appear\nin a given speech sentene",
       x = "Year of UNGA (General Debate is in September)",
       caption = 
         paste0("Note: Probability is inversely weighted by speech length so that each speech is treated equally.")) + 
  theme_classic() + 
  theme(legend.position = c(0.4,0.85),
        legend.background = element_rect(color="black"),
        plot.title = element_text(hjust=0.5),
        plot.caption = element_text(hjust=0, size=10),
        plot.caption.position = "plot")

ggsave("trend_analysis_bal_nonbal_blr_v3.pdf", 
       width = 9, height = 6)

########################################
## Difference in difference estimator ##
########################################

## Country Group Variable (excl. Ukraine, Georgia, Belarus, Russia)
dsov$cgr <- NA
dsov$cgr[which(dsov$Country%in%CAstates)] <- "Central Asia (CA)"
dsov$cgr[which(dsov$Country%in%NonCAstates)] <- "Non-CA post-Soviet"
dsov$cgr[which(!dsov$Country%in%c(CAstates,NonCAstates,Ukraine,Georgia,Belarus,Russia))] <- 
  "Non-post-Soviet"
dsov$cgrx <- factor(dsov$cgr, levels = c("Non-post-Soviet", 
                                        "Central Asia (CA)", 
                                        "Non-CA post-Soviet"))
## Russian adventurism year variable
dsov$tryear <- ifelse(dsov$Year %in% c(2008, 2014, 2022), 1, 0)


##############################
## Estimations (Main Model) ##
##############################

require(sandwich)
require(lmtest)

## Main Model (1997-2022)
ma <- lm(key_all ~ tryear + cgrx + tryear*cgrx, 
          data=subset(dsov, !Year %in% c(1992:1996)),
          weights = w)
mac_vcov <- vcovCL(ma, subset(dsov, !Year %in% c(1992:1996))$Country)
mac <- coeftest(ma, vcov.=mac_vcov)
## Main Model (2006-7 vs 2008)
m08 <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
           data=subset(dsov, Year %in% c(2006,
                                         2007,2008)), 
           weights = w)
m08c_vcov <- vcovCL(m08, subset(dsov, Year %in% c(2006,
                                                  2007,2008))$Country)
m08c <- coeftest(m08, vcov.=m08c_vcov)
## Main Model (2012-13 vs 2014)
m14 <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
           data=subset(dsov, Year %in% c(2012,
                                         2013,2014)),
           weights = w)
m14c_vcov <- vcovCL(m14, subset(dsov, Year %in% c(2012,
  2013,2014))$Country)
m14c <- coeftest(m14, vcov.=m14c_vcov)
## Main Model (2020-21 vs 2022)
m22 <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
           data=subset(dsov, Year %in% c(2020,
                                         2021,2022)),
           weights = w)
m22c_vcov <- vcovCL(m22, subset(dsov, Year %in% c(2020,
                                                  2021,2022))$Country)
m22c <- coeftest(m22, vcov.=m22c_vcov)

## Presenting results in a table
## (THIS IS TABLE A1 in ONLINE APPENDIX A)
require(texreg)
screenreg(list(ma,m08,m14,m22), digits = 3,
          override.se = list(mac[,2],m08c[,2],
                             m14c[,2],m22c[,2]),
          override.pvalues = list(mac[,4],m08c[,4],
                                  m14c[,4],m22c[,4]),
          custom.coef.names = c("(Intercept)",
                                "Russian adventurism",
                                "Central Asia (CA)",
                                "Non-CA post-Soviet",
                                "Adventurism * CA",
                                "Adventurism * Non-CA"),
          custom.model.names = c("All (97-22)","2006-08","2012-14","2020-22"),
          stars = c(0.1,0.05,0.01,0.001), symbol = "+")
## Export the table in tex format
texreg(list(ma,m08,m14,m22), digits = 3,
          override.se = list(mac[,2],m08c[,2],
                             m14c[,2],m22c[,2]),
          override.pvalues = list(mac[,4],m08c[,4],
                                  m14c[,4],m22c[,4]),
          custom.coef.names = c("(Intercept)",
                                "Russian adventurism",
                                "Central Asia (CA)",
                                "Non-CA post-Soviet",
                                "Adventurism * CA",
                                "Adventurism * Non-CA"),
       custom.model.names = c("All (97-22)","2006-08","2012-14","2020-22"),
       stars = c(0.1,0.05,0.01,0.001), symbol = "+",
       custom.note = "%stars. Robust standard errors clustered by countries in parentheses.",
       booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, table = FALSE,
       file = "did_table_main_v3.tex")

## Extracting Marginal Effects
## for 1997-2022 model
maceff <- exteff(ma, mac_vcov, 
       "tryear", "cgrx", 
       levels(dsov$cgrx))
maceff$m <- "All years\n(1997~2022)"
## for 2006-7 vs 2008 model
m08eff <- exteff(m08, m08c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m08eff$m <- "Russo-Georgian War\n(2006-07 vs 2008)"
## for 2012-13 vs 2014 model
m14eff <- exteff(m14, m14c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m14eff$m <- "Crimea Annexation\n(2012-13 vs 2014)"
## for 2020-21 vs 2022 model
m22eff <- exteff(m22, m22c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m22eff$m <- "Russo-Ukrainian War\n(2020-21 vs 2022)"

## Binding all results 
malleff <- rbind(maceff,m08eff,m14eff,m22eff)
malleff$modval <- factor(malleff$modval, 
                         levels = c("Non-post-Soviet", 
                                    "Non-CA post-Soviet",
                                    "Central Asia (CA)"))
malleff$m <- factor(malleff$m, levels = unique(malleff$m))
malleff$type <- "Average effect of\nRussian adventurism"

## Difference from CA states
## for 1997-2022 model
macdif <- exteff(ma, mac_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
macdif$modval <- maceff$modval
macdif$m <- "All years\n(1997~2022)"
## for 2006-7 vs 2008 model
m08dif <- exteff(m08, m08c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m08dif$modval <- m08eff$modval
m08dif$m <- "Russo-Georgian War\n(2006-07 vs 2008)"
## for 2012-13 vs 2014 model
m14dif <- exteff(m14, m14c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m14dif$modval <- m14eff$modval
m14dif$m <- "Crimea Annexation\n(2012-13 vs 2014)"
## for 2020-21 vs 2022 model
m22dif <- exteff(m22, m22c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m22dif$modval <- m22eff$modval
m22dif$m <- "Russo-Ukrainian War\n(2020-21 vs 2022)"

## Binding all results 
malldif <- rbind(macdif,m08dif,m14dif,m22dif)
malldif$modval <- factor(malldif$modval, 
                         levels = c("Non-post-Soviet", 
                                    "Non-CA post-Soviet",
                                    "Central Asia (CA)"))
malldif$m <- factor(malldif$m, levels = unique(malldif$m))
malldif$type <- "Difference\nfrom CA states"

## Display Confidence Intervals
cat("Confidence intervals for difference from CA states")
cbind(malldif[,1],round(malldif[,c(2,4,5,6,7)],3))

# ## Binding Effects & Differences
# malleffdif <- rbind(malleff,malldif)
# malleffdif$type <- as.factor(malleffdif$type)

## Plot difference-in-difference estimates
## (THIS IS FIGURE 2)
require(ggplot2)
ggplot(malleff, aes(x=est, y=modval, 
                    shape=factor(modval,levels=rev(levels(modval))),
                    color=factor(modval,levels=rev(levels(modval))))) + 
  geom_vline(aes(xintercept=0), linetype = 2) +
  geom_errorbarh(aes(xmin=lci95,xmax=uci95), 
                linewidth=0.5, height=0, alpha=1)+ 
  geom_errorbarh(aes(xmin=lci90,xmax=uci90),
                linewidth=1.5, height=0, alpha=0.8)+
  geom_point(size = 2.5) + 
  scale_shape_manual(name="Region", values=c(16,17,15)) + 
  scale_color_manual(name="Region", values=clrscale[c(1,2,3)]) + 
  facet_grid(m~.) + 
  labs(y = NULL, 
       x = paste0("Average effect of Russian adventurism (2008, 2014, 2022) on\n",
                  "the probability of ",
                  "sovereignty frame appearance in a given sentence"),
       caption = paste0("Note 1: Estimated with OLS regression, inversely weighted by speech length so that each speech is treated equally.\n",
                        "Note 2: Thin and thick bars respectively represent 95% and 90% confidence intervals using robust standard errors clustered by countries.\n",
                        "Note 3: Non-CA post-Soviet states exclude Russia, Belarus, Ukraine, and Georgia.")) +
  theme_bw() + 
  theme(strip.text.y = element_text(angle=0),
        strip.background = element_rect(fill = "white"),
        panel.grid = element_blank(),
        plot.caption = element_text(hjust = 0),
        plot.caption.position = "plot",
        legend.position = "none")

ggsave("did_analysis_main_v3.2.pdf", 
       width = 8, height = 5)

##################################
## Estimations (One Year Model) ##
##################################

ma <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
         data=dsov,
         weights = w)
mac_vcov <- vcovCL(ma, dsov$Country)
mac <- coeftest(ma, vcov.=mac_vcov)
m08 <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
          data=subset(dsov, Year %in% c(2007,2008)), 
          weights = w)
m08c_vcov <- vcovCL(m08, subset(dsov, Year %in% c(2007,2008))$Country)
m08c <- coeftest(m08, vcov.=m08c_vcov)
m14 <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
          data=subset(dsov, Year %in% c(2013,2014)),
          weights = w)
m14c_vcov <- vcovCL(m14, subset(dsov, Year %in% c(2013,2014))$Country)
m14c <- coeftest(m14, vcov.=m14c_vcov)
m22 <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
          data=subset(dsov, Year %in% c(2021,2022)),
          weights = w)
m22c_vcov <- vcovCL(m22, subset(dsov, Year %in% c(2021,2022))$Country)
m22c <- coeftest(m22, vcov.=m22c_vcov)

## (THIS IS TABLE A4 in ONLINE APPENDIX D.1)
require(texreg)
screenreg(list(ma,m08,m14,m22), digits = 3,
          override.se = list(mac[,2],m08c[,2],
                             m14c[,2],m22c[,2]),
          override.pvalues = list(mac[,4],m08c[,4],
                                  m14c[,4],m22c[,4]),
          custom.coef.names = c("(Intercept)",
                                "Russian adventurism",
                                "Central Asia (CA)",
                                "Non-CA post-Soviet",
                                "Adventurism * CA",
                                "Adventurism * Non-CA"),
          custom.model.names = c("All (92-22)","2007-08","2013-14","2021-22"),
          stars = c(0.1,0.05,0.01,0.001), symbol = "+")
texreg(list(ma,m08,m14,m22), digits = 3,
       override.se = list(mac[,2],m08c[,2],
                          m14c[,2],m22c[,2]),
       override.pvalues = list(mac[,4],m08c[,4],
                               m14c[,4],m22c[,4]),
       custom.coef.names = c("(Intercept)",
                             "Russian adventurism",
                             "Central Asia (CA)",
                             "Non-CA post-Soviet",
                             "Adventurism * CA",
                             "Adventurism * Non-CA"),
       custom.model.names = c("All (92-22)","2007-08","2013-14","2021-22"),
       stars = c(0.1,0.05,0.01,0.001), symbol = "+",
       custom.note = "%stars. Robust standard errors clustered by countries in parentheses.",
       booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, table = FALSE,
       file = "did_table_1yr_v3.tex")

maceff <- exteff(ma, mac_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
maceff$m <- "All years\n(1992~2022)"
m08eff <- exteff(m08, m08c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m08eff$m <- "Russo-Georgian War\n(2007 vs 2008)"
m14eff <- exteff(m14, m14c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m14eff$m <- "Crimea Annexation\n(2013 vs 2014)"
m22eff <- exteff(m22, m22c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m22eff$m <- "Russo-Ukrainian War\n(2021 vs 2022)"

malleff <- rbind(maceff,m08eff,m14eff,m22eff)
malleff$modval <- factor(malleff$modval, 
                         levels = c("Non-post-Soviet", 
                                    "Non-CA post-Soviet",
                                    "Central Asia (CA)"))
malleff$m <- factor(malleff$m, levels = unique(malleff$m))
malleff$type <- "Average effect of\nRussian adventurism"

## Difference from CA states
## for 1997-2022 model
macdif <- exteff(ma, mac_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
macdif$modval <- maceff$modval
macdif$m <- "All years\n(1992~2022)"
## for 2006-7 vs 2008 model
m08dif <- exteff(m08, m08c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m08dif$modval <- m08eff$modval
m08dif$m <- "Russo-Georgian War\n(2007 vs 2008)"
## for 2012-13 vs 2014 model
m14dif <- exteff(m14, m14c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m14dif$modval <- m14eff$modval
m14dif$m <- "Crimea Annexation\n(2013 vs 2014)"
## for 2020-21 vs 2022 model
m22dif <- exteff(m22, m22c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m22dif$modval <- m22eff$modval
m22dif$m <- "Russo-Ukrainian War\n(2021 vs 2022)"

## Binding all results 
malldif <- rbind(macdif,m08dif,m14dif,m22dif)
malldif$modval <- factor(malldif$modval, 
                         levels = c("Non-post-Soviet", 
                                    "Non-CA post-Soviet",
                                    "Central Asia (CA)"))
malldif$m <- factor(malldif$m, levels = unique(malldif$m))
malldif$type <- "Difference\nfrom CA states"

## Display Confidence Intervals
cat("Confidence intervals for difference from CA states")
cbind(malldif[,1],round(malldif[,c(2,4,5,6,7)],3))

# ## Binding Effects & Differences
# malleffdif <- rbind(malleff,malldif)
# malleffdif$type <- as.factor(malleffdif$type)

## (THIS IS FIGURE A6 in ONLINE APPENDIX D.1)
require(ggplot2)
ggplot(malleff, aes(x=est, y=modval, 
                    shape=factor(modval,levels=rev(levels(modval))),
                    color=factor(modval,levels=rev(levels(modval))))) + 
  geom_vline(aes(xintercept=0), linetype = 2) +
  geom_errorbarh(aes(xmin=lci95,xmax=uci95), 
                 linewidth=0.5, height=0, alpha=1)+ 
  geom_errorbarh(aes(xmin=lci90,xmax=uci90),
                 linewidth=1.5, height=0, alpha=0.8)+
  geom_point(size = 2.5) + 
  scale_shape_manual(name="Region", values=c(16,17,15)) + 
  scale_color_manual(name="Region", values=clrscale[c(1,2,3)]) + 
  facet_grid(m~.) + 
  labs(y = NULL, 
       x = paste0("Average effect of Russian adventurism (2008, 2014, 2022) on\n",
                  "the probability of ",
                  "sovereignty frame appearance in a given sentence"),
       caption = paste0("Note 1: Estimated with OLS regression, inversely weighted by speech length so that each speech is treated equally.\n",
                        "Note 2: Thin and thick bars respectively represent 95% and 90% confidence intervals using robust standard errors clustered by countries.\n",
                        "Note 3: Non-CA post-Soviet states exclude Russia, Belarus, Ukraine, and Georgia.")) +
  theme_bw() + 
  theme(strip.text.y = element_text(angle=0),
        strip.background = element_rect(fill = "white"),
        panel.grid = element_blank(),
        plot.caption = element_text(hjust = 0),
        plot.caption.position = "plot",
        legend.position = "none")

ggsave("did_analysis_1yr_v3.2.pdf", 
       width = 8, height = 5)

#####################################
## Estimations (Three Years Model) ##
#####################################

m08 <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
          data=subset(dsov, Year %in% c(2005:2007,2008)), 
          weights = w)
m08c_vcov <- vcovCL(m08, subset(dsov, Year %in% c(2005:2007,2008))$Country)
m08c <- coeftest(m08, vcov.=m08c_vcov)
m14 <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
          data=subset(dsov, Year %in% c(2011:2013,2014)),
          weights = w)
m14c_vcov <- vcovCL(m14, subset(dsov, Year %in% c(2011:2013,2014))$Country)
m14c <- coeftest(m14, vcov.=m14c_vcov)
m22 <- lm(key_all ~ tryear + cgrx + tryear*cgrx,  
          data=subset(dsov, Year %in% c(2019:2021,2022)),
          weights = w)
m22c_vcov <- vcovCL(m22, subset(dsov, Year %in% c(2019:2021,2022))$Country)
m22c <- coeftest(m22, vcov.=m22c_vcov)

## (THIS IS TABLE A5 in ONLINE APPENDIX D.1)
require(texreg)
screenreg(list(m08,m14,m22), digits = 3,
          override.se = list(m08c[,2],
                             m14c[,2],m22c[,2]),
          override.pvalues = list(m08c[,4],
                                  m14c[,4],m22c[,4]),
          custom.coef.names = c("(Intercept)",
                                "Russian adventurism",
                                "Central Asia (CA)",
                                "Non-CA post-Soviet",
                                "Adventurism * CA",
                                "Adventurism * Non-CA"),
          custom.model.names = c("2005-08","2011-14","2019-22"),
          stars = c(0.1,0.05,0.01,0.001), symbol = "+")
texreg(list(m08,m14,m22), digits = 3,
       override.se = list(m08c[,2],
                          m14c[,2],m22c[,2]),
       override.pvalues = list(m08c[,4],
                               m14c[,4],m22c[,4]),
       custom.coef.names = c("(Intercept)",
                             "Russian adventurism",
                             "Central Asia (CA)",
                             "Non-CA post-Soviet",
                             "Adventurism * CA",
                             "Adventurism * Non-CA"),
       custom.model.names = c("2005-08","2011-14","2019-22"),
       stars = c(0.1,0.05,0.01,0.001), symbol = "+",
       custom.note = "%stars. Robust standard errors clustered by countries in parentheses.",
       booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, table = FALSE,
       file = "did_table_3yrs_v3.tex")

m08eff <- exteff(m08, m08c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m08eff$m <- "Russo-Georgian War\n(2005-07 vs 2008)"
m14eff <- exteff(m14, m14c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m14eff$m <- "Crimea Annexation\n(2011-13 vs 2014)"
m22eff <- exteff(m22, m22c_vcov, 
                 "tryear", "cgrx", 
                 levels(dsov$cgrx))
m22eff$m <- "Russo-Ukrainian War\n(2019-21 vs 2022)"

malleff <- rbind(m08eff,m14eff,m22eff)
malleff$modval <- factor(malleff$modval, 
                         levels = c("Non-post-Soviet", 
                                    "Non-CA post-Soviet",
                                    "Central Asia (CA)"))
malleff$m <- factor(malleff$m, levels = unique(malleff$m))
malleff$type <- "Average effect of\nRussian adventurism"

## Difference from CA states
## for 2006-7 vs 2008 model
m08dif <- exteff(m08, m08c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m08dif$modval <- m08eff$modval
m08dif$m <- "Russo-Georgian War\n(2005-07 vs 2008)"
## for 2012-13 vs 2014 model
m14dif <- exteff(m14, m14c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m14dif$modval <- m14eff$modval
m14dif$m <- "Crimea Annexation\n(2011-13 vs 2014)"
## for 2020-21 vs 2022 model
m22dif <- exteff(m22, m22c_vcov, 
                 c("REFERENCE","SAME","tryear:cgrxNon-CA post-Soviet"),
                 "tryear:cgrxCentral Asia (CA)",
                 "subtract")
m22dif$modval <- m22eff$modval
m22dif$m <- "Russo-Ukrainian War\n(2019-21 vs 2022)"

## Binding all results 
malldif <- rbind(m08dif,m14dif,m22dif)
malldif$modval <- factor(malldif$modval, 
                         levels = c("Non-post-Soviet", 
                                    "Non-CA post-Soviet",
                                    "Central Asia (CA)"))
malldif$m <- factor(malldif$m, levels = unique(malldif$m))
malldif$type <- "Difference\nfrom CA states"

## Display Confidence Intervals
cat("Confidence intervals for difference from CA states")
cbind(malldif[,1],round(malldif[,c(2,4,5,6,7)],3))

# ## Binding Effects & Differences
# malleffdif <- rbind(malleff,malldif)
# malleffdif$type <- as.factor(malleffdif$type)

## (THIS IS FIGURE A7 in ONLINE APPENDIX D.2)
require(ggplot2)
ggplot(malleff, aes(x=est, y=modval, 
                    shape=factor(modval,levels=rev(levels(modval))),
                    color=factor(modval,levels=rev(levels(modval))))) + 
  geom_vline(aes(xintercept=0), linetype = 2) +
  geom_errorbarh(aes(xmin=lci95,xmax=uci95), 
                 linewidth=0.5, height=0, alpha=1)+ 
  geom_errorbarh(aes(xmin=lci90,xmax=uci90),
                 linewidth=1.5, height=0, alpha=0.8)+
  geom_point(size = 2.5) + 
  scale_shape_manual(name="Region", values=c(16,17,15)) + 
  scale_color_manual(name="Region", values=clrscale[c(1,2,3)]) + 
  facet_grid(m~.) + 
  labs(y = NULL, 
       x = paste0("Average effect of Russian adventurism (2008, 2014, 2022) on\n",
                  "the probability of ",
                  "sovereignty frame appearance in a given sentence"),
       caption = paste0("Note 1: Estimated with OLS regression, inversely weighted by speech length so that each speech is treated equally.\n",
                        "Note 2: Thin and thick bars respectively represent 95% and 90% confidence intervals using robust standard errors clustered by countries.\n",
                        "Note 3: Non-CA post-Soviet states exclude Russia, Belarus, Ukraine, and Georgia.")) +
  theme_bw() + 
  theme(strip.text.y = element_text(angle=0),
        strip.background = element_rect(fill = "white"),
        panel.grid = element_blank(),
        plot.caption = element_text(hjust = 0),
        plot.caption.position = "plot",
        legend.position = "none")

ggsave("did_analysis_3yrs_v3.2.pdf", 
       width = 8, height = 5)

###################################################
## Difference in difference estimator in Subsets ##
###################################################

## Country Group Variable (excl. Ukraine, Georgia, Belarus, Russia)
dsov$cgrs <- NA
dsov$cgrs[which(dsov$Country%in%CAstates)] <- "Central Asia (CA)"
dsov$cgrs[which(dsov$Country%in%Balticstates)] <- "Baltic post-Soviet"
dsov$cgrs[which(dsov$Country%in%Caucasusstates)] <- "Caucasus/EE post-Soviet"
dsov$cgrs[which(dsov$Country%in%Belarus)] <- "Belarus"
dsov$cgrs[which(!dsov$Country%in%c(CAstates,NonCAstates,Ukraine,Georgia,Belarus,Russia))] <- 
  "Non-post-Soviet"
dsov$cgrsx <- factor(dsov$cgrs, levels = c("Non-post-Soviet", 
                                         "Central Asia (CA)", 
                                         "Baltic post-Soviet",
                                         "Caucasus/EE post-Soviet",
                                         "Belarus"))
## Russian adventurism years variable
dsov$tryear <- ifelse(dsov$Year %in% c(2008, 2014, 2022), 1, 0)


#########################################
## Estimations (Main Model) in Subsets ##
#########################################

require(sandwich)
require(lmtest)

msa <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
         data=subset(dsov, !Year %in% c(1992:1996)),
         weights = w)
msac_vcov <- vcovCL(msa, subset(dsov, !Year %in% c(1992:1996))$Country)
msac <- coeftest(msa, vcov.=msac_vcov)
ms08 <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
          data=subset(dsov, Year %in% c(2006,
                                        2007,2008)), 
          weights = w)
ms08c_vcov <- vcovCL(ms08, subset(dsov, Year %in% c(2006,
                                                  2007,2008))$Country)
ms08c <- coeftest(ms08, vcov.=ms08c_vcov)
ms14 <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
          data=subset(dsov, Year %in% c(2012,
                                        2013,2014)),
          weights = w)
ms14c_vcov <- vcovCL(ms14, subset(dsov, Year %in% c(2012,
                                                  2013,2014))$Country)
ms14c <- coeftest(ms14, vcov.=ms14c_vcov)
ms22 <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
          data=subset(dsov, Year %in% c(2020,
                                        2021,2022)),
          weights = w)
ms22c_vcov <- vcovCL(ms22, subset(dsov, Year %in% c(2020,
                                                  2021,2022))$Country)
ms22c <- coeftest(ms22, vcov.=ms22c_vcov)

## (THIS IS TABLE A3 in ONLINE APPENDIX C)
require(texreg)
screenreg(list(msa,ms08,ms14,ms22), digits = 3,
          override.se = list(msac[,2],ms08c[,2],
                             ms14c[,2],ms22c[,2]),
          override.pvalues = list(msac[,4],ms08c[,4],
                                  ms14c[,4],ms22c[,4]),
          custom.coef.names = c("(Intercept)",
                                "Russian adventurism",
                                "Central Asia (CA)",
                                "Baltic post-Soviet",
                                "Caucasus/EE post-Soviet",
                                "Belarus",
                                "Adventurism * CA",
                                "Adventurism * Baltic",
                                "Adventurism * Caucasus/EE",
                                "Adventurism * Belarus"),
          custom.model.names = c("All (97-22)","2006-08","2012-14","2020-22"),
          stars = c(0.1,0.05,0.01,0.001), symbol = "+")
texreg(list(msa,ms08,ms14,ms22), digits = 3,
       override.se = list(msac[,2],ms08c[,2],
                          ms14c[,2],ms22c[,2]),
       override.pvalues = list(msac[,4],ms08c[,4],
                               ms14c[,4],ms22c[,4]),
       custom.coef.names = c("(Intercept)",
                             "Russian adventurism",
                             "Central Asia (CA)",
                             "Baltic post-Soviet",
                             "Caucasus/EE post-Soviet",
                             "Belarus",
                             "Adventurism * CA",
                             "Adventurism * Baltic",
                             "Adventurism * Caucasus/EE",
                             "Adventurism * Belarus"),
       custom.model.names = c("All (97-22)","2006-08","2012-14","2020-22"),
       stars = c(0.1,0.05,0.01,0.001), symbol = "+",
       custom.note = "%stars. Robust standard errors clustered by countries in parentheses.",
       booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, table = FALSE,
       file = "did_table_main_subsets_v3.tex")

msaceff <- exteff(msa, msac_vcov, 
                 "tryear", "cgrsx", 
                 levels(dsov$cgrsx))
msaceff$m <- "All years\n(1997~2022)"
ms08eff <- exteff(ms08, ms08c_vcov, 
                 "tryear", "cgrsx", 
                 levels(dsov$cgrsx))
ms08eff$m <- "Russo-Georgian War\n(2006-07 vs 2008)"
ms14eff <- exteff(ms14, ms14c_vcov, 
                 "tryear", "cgrsx", 
                 levels(dsov$cgrsx))
ms14eff$m <- "Crimea Annexation\n(2012-13 vs 2014)"
ms22eff <- exteff(ms22, ms22c_vcov, 
                 "tryear", "cgrsx", 
                 levels(dsov$cgrsx))
ms22eff$m <- "Russo-Ukrainian War\n(2020-21 vs 2022)"

## Country cluster SE does not work for Belarus, so use standard SE
for(i in 3:7) {
  msaceff[5,i] <- exteff(msa,vcov(msa),"tryear","cgrsx","Belarus")[,i]
}
for(i in 3:7) {
  ms08eff[5,i] <- exteff(ms08,vcov(ms08),"tryear","cgrsx","Belarus")[,i]
}
for(i in 3:7) {
  ms14eff[5,i] <- exteff(ms14,vcov(ms14),"tryear","cgrsx","Belarus")[,i]
}
for(i in 3:7) {
  ms22eff[5,i] <- exteff(ms22,vcov(ms22),"tryear","cgrsx","Belarus")[,i]
}

msalleff <- rbind(msaceff,ms08eff,ms14eff,ms22eff)
msalleff$modval <- factor(msalleff$modval, 
                          levels = c("Non-post-Soviet", 
                                     "Belarus",
                                     "Caucasus/EE post-Soviet",
                                     "Baltic post-Soviet",
                                     "Central Asia (CA)"))
msalleff$m <- factor(msalleff$m, levels = unique(msalleff$m))
msalleff$type <- "Average effect of\nRussian adventurism"

## Difference from CA states
## for 1997-2022 model
msacdif <- exteff(msa, msac_vcov, 
                 c("REFERENCE","SAME",
                   "tryear:cgrsxBaltic post-Soviet",
                   "tryear:cgrsxCaucasus/EE post-Soviet",
                   "tryear:cgrsxBelarus"),
                 "tryear:cgrsxCentral Asia (CA)",
                 "subtract")
msacdif$modval <- msaceff$modval
msacdif$m <- "All years\n(1997~2022)"
## for 2006-7 vs 2008 model
ms08dif <- exteff(ms08, ms08c_vcov, 
                 c("REFERENCE","SAME",
                   "tryear:cgrsxBaltic post-Soviet",
                   "tryear:cgrsxCaucasus/EE post-Soviet",
                   "tryear:cgrsxBelarus"),
                 "tryear:cgrsxCentral Asia (CA)",
                 "subtract")
ms08dif$modval <- ms08eff$modval
ms08dif$m <- "Russo-Georgian War\n(2006-07 vs 2008)"
## for 2012-13 vs 2014 model
ms14dif <- exteff(ms14, ms14c_vcov, 
                 c("REFERENCE","SAME",
                   "tryear:cgrsxBaltic post-Soviet",
                   "tryear:cgrsxCaucasus/EE post-Soviet",
                   "tryear:cgrsxBelarus"),
                 "tryear:cgrsxCentral Asia (CA)",
                 "subtract")
ms14dif$modval <- ms14eff$modval
ms14dif$m <- "Crimea Annexation\n(2012-13 vs 2014)"
## for 2020-21 vs 2022 model
ms22dif <- exteff(ms22, ms22c_vcov, 
                 c("REFERENCE","SAME",
                   "tryear:cgrsxBaltic post-Soviet",
                   "tryear:cgrsxCaucasus/EE post-Soviet",
                   "tryear:cgrsxBelarus"),
                 "tryear:cgrsxCentral Asia (CA)",
                 "subtract")
ms22dif$modval <- ms22eff$modval
ms22dif$m <- "Russo-Ukrainian War\n(2020-21 vs 2022)"

## Binding all results 
msalldif <- rbind(msacdif,ms08dif,ms14dif,ms22dif)
msalldif$modval <- factor(msalldif$modval, 
                         levels = c("Non-post-Soviet", 
                                    "Belarus",
                                    "Caucasus/EE post-Soviet",
                                    "Baltic post-Soviet",
                                    "Central Asia (CA)"))
msalldif$m <- factor(msalldif$m, levels = unique(msalldif$m))
msalldif$type <- "Difference\nfrom CA states"

## Display Confidence Intervals
cat("Confidence intervals for difference from CA states")
cbind(msalldif[,1],round(msalldif[,c(2,4,5,6,7)],3))

# ## Binding Effects & Differences
# msalleffdif <- rbind(msalleff,msalldif)
# msalleffdif$type <- as.factor(msalleffdif$type)

## (THIS IS FIGURE A5 in ONLINE APPENDIX C)
require(ggplot2)
ggplot(msalleff, aes(x=est, y=modval, 
                    shape=factor(modval,levels=rev(levels(modval))),
                    color=factor(modval,levels=rev(levels(modval))))) + 
  geom_vline(aes(xintercept=0), linetype = 2) +
  geom_errorbarh(aes(xmin=lci95,xmax=uci95), 
                 linewidth=0.5, height=0, alpha=1)+ 
  geom_errorbarh(aes(xmin=lci90,xmax=uci90),
                 linewidth=1.5, height=0, alpha=0.8)+
  geom_point(size = 2.5) + 
  scale_shape_manual(name="Region", values=c(16,2,17,18,15)) + 
  scale_color_manual(name="Region", values=clrscale[c(1,4,5,6,3)]) + 
  facet_grid(m~.) + 
  labs(y = NULL, 
       x = paste0("Average effect of Russian adventurism (2008, 2014, 2022) on\n",
                  "the probability of ",
                  "sovereignty frame appearance in a given sentence"),
       caption = paste0("Note 1: Estimated with OLS regression, inversely weighted by speech length so that each speech is treated equally.\n",
                        "Note 2: Thin and thick bars respectively represent 95% and 90% confidence intervals using robust standard errors clustered by countries.\n",
                        "Note 3: The analysis excludes Russia, Ukraine, and Georgia.")) +
  theme_bw() + 
  theme(strip.text.y = element_text(angle=0),
        strip.background = element_rect(fill = "white"),
        panel.grid = element_blank(),
        plot.caption = element_text(hjust = 0),
        plot.caption.position = "plot",
        legend.position = "none")

ggsave("did_analysis_main_subsets_v3.2.pdf", 
       width = 8, height = 5)

#############################################
## Estimations (One Year Model) in Subsets ##
#############################################

msa <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
         data=dsov,
         weights = w)
msac_vcov <- vcovCL(msa, dsov$Country)
msac <- coeftest(msa, vcov.=msac_vcov)
ms08 <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
          data=subset(dsov, Year %in% c(2007,2008)), 
          weights = w)
ms08c_vcov <- vcovCL(ms08, subset(dsov, Year %in% c(2007,2008))$Country)
ms08c <- coeftest(ms08, vcov.=ms08c_vcov)
ms14 <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
          data=subset(dsov, Year %in% c(2013,2014)),
          weights = w)
ms14c_vcov <- vcovCL(ms14, subset(dsov, Year %in% c(2013,2014))$Country)
ms14c <- coeftest(ms14, vcov.=ms14c_vcov)
ms22 <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
          data=subset(dsov, Year %in% c(2021,2022)),
          weights = w)
ms22c_vcov <- vcovCL(ms22, subset(dsov, Year %in% c(2021,2022))$Country)
ms22c <- coeftest(ms22, vcov.=ms22c_vcov)

## (THIS IS TABLE A6 in ONLINE APPENDIX D.2)
require(texreg)
screenreg(list(msa,ms08,ms14,ms22), digits = 3,
          override.se = list(msac[,2],ms08c[,2],
                             ms14c[,2],ms22c[,2]),
          override.pvalues = list(msac[,4],ms08c[,4],
                                  ms14c[,4],ms22c[,4]),
          custom.coef.names = c("(Intercept)",
                                "Russian adventurism",
                                "Central Asia (CA)",
                                "Baltic post-Soviet",
                                "Caucasus/EE post-Soviet",
                                "Belarus",
                                "Adventurism * CA",
                                "Adventurism * Baltic",
                                "Adventurism * Caucasus/EE",
                                "Adventurism * Belarus"),
          custom.model.names = c("All (92-22)","2007-08","2013-14","2021-22"),
          stars = c(0.1,0.05,0.01,0.001), symbol = "+")
texreg(list(msa,ms08,ms14,ms22), digits = 3,
       override.se = list(msac[,2],ms08c[,2],
                          ms14c[,2],ms22c[,2]),
       override.pvalues = list(msac[,4],ms08c[,4],
                               ms14c[,4],ms22c[,4]),
       custom.coef.names = c("(Intercept)",
                             "Russian adventurism",
                             "Central Asia (CA)",
                             "Baltic post-Soviet",
                             "Caucasus/EE post-Soviet",
                             "Belarus",
                             "Adventurism * CA",
                             "Adventurism * Baltic",
                             "Adventurism * Caucasus/EE",
                             "Adventurism * Belarus"),
       custom.model.names = c("All (92-22)","2007-08","2013-14","2021-22"),
       stars = c(0.1,0.05,0.01,0.001), symbol = "+",
       custom.note = "%stars. Robust standard errors clustered by countries in parentheses.",
       booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, table = FALSE,
       file = "did_table_1yr_subsets_v3.tex")

msaceff <- exteff(msa, msac_vcov, 
                 "tryear", "cgrsx", 
                 levels(dsov$cgrsx))
msaceff$m <- "All years\n(1992~2022)"
ms08eff <- exteff(ms08, ms08c_vcov, 
                 "tryear", "cgrsx", 
                 levels(dsov$cgrsx))
ms08eff$m <- "Russo-Georgian War\n(2007 vs 2008)"
ms14eff <- exteff(ms14, ms14c_vcov, 
                 "tryear", "cgrsx", 
                 levels(dsov$cgrsx))
ms14eff$m <- "Crimea Annexation\n(2013 vs 2014)"
ms22eff <- exteff(ms22, ms22c_vcov, 
                 "tryear", "cgrsx", 
                 levels(dsov$cgrsx))
ms22eff$m <- "Russo-Ukrainian War\n(2021 vs 2022)"

## Country cluster SE does not work for Belarus, so use standard SE
for(i in 3:7) {
  msaceff[5,i] <- exteff(msa,vcov(msa),"tryear","cgrsx","Belarus")[,i]
}
for(i in 3:7) {
  ms08eff[5,i] <- exteff(ms08,vcov(ms08),"tryear","cgrsx","Belarus")[,i]
}
for(i in 3:7) {
  ms14eff[5,i] <- exteff(ms14,vcov(ms14),"tryear","cgrsx","Belarus")[,i]
}
for(i in 3:7) {
  ms22eff[5,i] <- exteff(ms22,vcov(ms22),"tryear","cgrsx","Belarus")[,i]
}

msalleff <- rbind(msaceff,ms08eff,ms14eff,ms22eff)
msalleff$modval <- factor(msalleff$modval, 
                          levels = c("Non-post-Soviet", 
                                     "Belarus",
                                     # "Ukraine/Georgia",
                                     "Caucasus/EE post-Soviet",
                                     "Baltic post-Soviet",
                                     "Central Asia (CA)"))
msalleff$m <- factor(msalleff$m, levels = unique(msalleff$m))
msalleff$type <- "Average effect of\nRussian adventurism"

## Difference from CA states
## for 1997-2022 model
msacdif <- exteff(msa, msac_vcov, 
                  c("REFERENCE","SAME",
                    "tryear:cgrsxBaltic post-Soviet",
                    "tryear:cgrsxCaucasus/EE post-Soviet",
                    "tryear:cgrsxBelarus"),
                  "tryear:cgrsxCentral Asia (CA)",
                  "subtract")
msacdif$modval <- msaceff$modval
msacdif$m <- "All years\n(1992~2022)"
## for 2006-7 vs 2008 model
ms08dif <- exteff(ms08, ms08c_vcov, 
                  c("REFERENCE","SAME",
                    "tryear:cgrsxBaltic post-Soviet",
                    "tryear:cgrsxCaucasus/EE post-Soviet",
                    "tryear:cgrsxBelarus"),
                  "tryear:cgrsxCentral Asia (CA)",
                  "subtract")
ms08dif$modval <- ms08eff$modval
ms08dif$m <- "Russo-Georgian War\n(2007 vs 2008)"
## for 2012-13 vs 2014 model
ms14dif <- exteff(ms14, ms14c_vcov, 
                  c("REFERENCE","SAME",
                    "tryear:cgrsxBaltic post-Soviet",
                    "tryear:cgrsxCaucasus/EE post-Soviet",
                    "tryear:cgrsxBelarus"),
                  "tryear:cgrsxCentral Asia (CA)",
                  "subtract")
ms14dif$modval <- ms14eff$modval
ms14dif$m <- "Crimea Annexation\n(2013 vs 2014)"
## for 2020-21 vs 2022 model
ms22dif <- exteff(ms22, ms22c_vcov, 
                  c("REFERENCE","SAME",
                    "tryear:cgrsxBaltic post-Soviet",
                    "tryear:cgrsxCaucasus/EE post-Soviet",
                    "tryear:cgrsxBelarus"),
                  "tryear:cgrsxCentral Asia (CA)",
                  "subtract")
ms22dif$modval <- ms22eff$modval
ms22dif$m <- "Russo-Ukrainian War\n(2021 vs 2022)"

## Binding all results 
msalldif <- rbind(msacdif,ms08dif,ms14dif,ms22dif)
msalldif$modval <- factor(msalldif$modval, 
                          levels = c("Non-post-Soviet", 
                                     "Belarus",
                                     "Caucasus/EE post-Soviet",
                                     "Baltic post-Soviet",
                                     "Central Asia (CA)"))
msalldif$m <- factor(msalldif$m, levels = unique(msalldif$m))
msalldif$type <- "Difference\nfrom CA states"

## Display Confidence Intervals
cat("Confidence intervals for difference from CA states")
cbind(msalldif[,1],round(msalldif[,c(2,4,5,6,7)],3))

# ## Binding Effects & Differences
# msalleffdif <- rbind(msalleff,msalldif)
# msalleffdif$type <- as.factor(msalleffdif$type)

## (THIS IS FIGURE A8 in ONLINE APPENDIX D.2)
require(ggplot2)
ggplot(msalleff, aes(x=est, y=modval, 
                    shape=factor(modval,levels=rev(levels(modval))),
                    color=factor(modval,levels=rev(levels(modval))))) + 
  geom_vline(aes(xintercept=0), linetype = 2) +
  geom_errorbarh(aes(xmin=lci95,xmax=uci95), 
                 linewidth=0.5, height=0, alpha=1)+ 
  geom_errorbarh(aes(xmin=lci90,xmax=uci90),
                 linewidth=1.5, height=0, alpha=0.8)+
  geom_point(size = 2.5) + 
  scale_shape_manual(name="Region", values=c(16,2,17,18,15)) + 
  scale_color_manual(name="Region", values=clrscale[c(1,4,5,6,3)]) + 
  facet_grid(m~.) + 
  labs(y = NULL, 
       x = paste0("Average effect of Russian adventurism (2008, 2014, 2022) on\n",
                  "the probability of ",
                  "sovereignty frame appearance in a given sentence"),
       caption = paste0("Note 1: Estimated with OLS regression, inversely weighted by speech length so that each speech is treated equally.\n",
                        "Note 2: Thin and thick bars respectively represent 95% and 90% confidence intervals using robust standard errors clustered by countries.\n",
                        "Note 3: The analysis excludes Russia, Ukraine, and Georgia.")) +
  theme_bw() + 
  theme(strip.text.y = element_text(angle=0),
        strip.background = element_rect(fill = "white"),
        panel.grid = element_blank(),
        plot.caption = element_text(hjust = 0),
        plot.caption.position = "plot",
        legend.position = "none")

ggsave("did_analysis_1yr_subsets_v3.2.pdf", 
       width = 8, height = 5)

################################################
## Estimations (Three Years Model) in Subsets ##
################################################

ms08 <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
           data=subset(dsov, Year %in% c(2005:2007,2008)), 
           weights = w)
ms08c_vcov <- vcovCL(ms08, subset(dsov, Year %in% c(2005:2007,2008))$Country)
ms08c <- coeftest(ms08, vcov.=ms08c_vcov)
ms14 <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
           data=subset(dsov, Year %in% c(2011:2013,2014)),
           weights = w)
ms14c_vcov <- vcovCL(ms14, subset(dsov, Year %in% c(2011:2013,2014))$Country)
ms14c <- coeftest(ms14, vcov.=ms14c_vcov)
ms22 <- lm(key_all ~ tryear + cgrsx + tryear*cgrsx,  
           data=subset(dsov, Year %in% c(2019:2021,2022)),
           weights = w)
ms22c_vcov <- vcovCL(ms22, subset(dsov, Year %in% c(2019:2021,2022))$Country)
ms22c <- coeftest(ms22, vcov.=ms22c_vcov)

## (THIS IS TABLE A7 in ONLINE APPENDIX D.2)
require(texreg)
screenreg(list(ms08,ms14,ms22), digits = 3,
          override.se = list(ms08c[,2],
                             ms14c[,2],ms22c[,2]),
          override.pvalues = list(ms08c[,4],
                                  ms14c[,4],ms22c[,4]),
          custom.coef.names = c("(Intercept)",
                                "Russian adventurism",
                                "Central Asia (CA)",
                                "Baltic post-Soviet",
                                "Caucasus/EE post-Soviet",
                                "Belarus",
                                "Adventurism * CA",
                                "Adventurism * Baltic",
                                "Adventurism * Caucasus/EE",
                                "Adventurism * Belarus"),
          custom.model.names = c("2005-08","2011-14","2019-22"),
          stars = c(0.1,0.05,0.01,0.001), symbol = "+")
texreg(list(ms08,ms14,ms22), digits = 3,
       override.se = list(ms08c[,2],
                          ms14c[,2],ms22c[,2]),
       override.pvalues = list(ms08c[,4],
                               ms14c[,4],ms22c[,4]),
       custom.coef.names = c("(Intercept)",
                             "Russian adventurism",
                             "Central Asia (CA)",
                             "Baltic post-Soviet",
                             "Caucasus/EE post-Soviet",
                             "Belarus",
                             "Adventurism * CA",
                             "Adventurism * Baltic",
                             "Adventurism * Caucasus/EE",
                             "Adventurism * Belarus"),
       custom.model.names = c("2005-08","2011-14","2019-22"),
       stars = c(0.1,0.05,0.01,0.001), symbol = "+",
       custom.note = "%stars. Robust standard errors clustered by countries in parentheses.",
       booktabs = TRUE, dcolumn = TRUE, use.packages = FALSE, table = FALSE,
       file = "did_table_3yrs_subsets_v3.tex")

ms08eff <- exteff(ms08, ms08c_vcov, 
                  "tryear", "cgrsx", 
                  levels(dsov$cgrsx))
ms08eff$m <- "Russo-Georgian War\n(2005-07 vs 2008)"
ms14eff <- exteff(ms14, ms14c_vcov, 
                  "tryear", "cgrsx", 
                  levels(dsov$cgrsx))
ms14eff$m <- "Crimea Annexation\n(2011-13 vs 2014)"
ms22eff <- exteff(ms22, ms22c_vcov, 
                  "tryear", "cgrsx", 
                  levels(dsov$cgrsx))
ms22eff$m <- "Russo-Ukrainian War\n(2019-21 vs 2022)"

## Country cluster SE does not work for Belarus, so use standard SE
for(i in 3:7) {
  msaceff[5,i] <- exteff(msa,vcov(msa),"tryear","cgrsx","Belarus")[,i]
}
for(i in 3:7) {
  ms08eff[5,i] <- exteff(ms08,vcov(ms08),"tryear","cgrsx","Belarus")[,i]
}
for(i in 3:7) {
  ms14eff[5,i] <- exteff(ms14,vcov(ms14),"tryear","cgrsx","Belarus")[,i]
}
for(i in 3:7) {
  ms22eff[5,i] <- exteff(ms22,vcov(ms22),"tryear","cgrsx","Belarus")[,i]
}

msalleff <- rbind(ms08eff,ms14eff,ms22eff)
msalleff$modval <- factor(msalleff$modval, 
                          levels = c("Non-post-Soviet", 
                                     "Belarus",
                                     "Caucasus/EE post-Soviet",
                                     "Baltic post-Soviet",
                                     "Central Asia (CA)"))
msalleff$m <- factor(msalleff$m, levels = unique(msalleff$m))
msalleff$type <- "Average effect of\nRussian adventurism"

## Difference from CA states
## for 2006-7 vs 2008 model
ms08dif <- exteff(ms08, ms08c_vcov, 
                  c("REFERENCE","SAME",
                    "tryear:cgrsxBaltic post-Soviet",
                    "tryear:cgrsxCaucasus/EE post-Soviet",
                    "tryear:cgrsxBelarus"),
                  "tryear:cgrsxCentral Asia (CA)",
                  "subtract")
ms08dif$modval <- ms08eff$modval
ms08dif$m <- "Russo-Georgian War\n(2005-07 vs 2008)"
## for 2012-13 vs 2014 model
ms14dif <- exteff(ms14, ms14c_vcov, 
                  c("REFERENCE","SAME",
                    "tryear:cgrsxBaltic post-Soviet",
                    "tryear:cgrsxCaucasus/EE post-Soviet",
                    "tryear:cgrsxBelarus"),
                  "tryear:cgrsxCentral Asia (CA)",
                  "subtract")
ms14dif$modval <- ms14eff$modval
ms14dif$m <- "Crimea Annexation\n(2011-13 vs 2014)"
## for 2020-21 vs 2022 model
ms22dif <- exteff(ms22, ms22c_vcov, 
                  c("REFERENCE","SAME",
                    "tryear:cgrsxBaltic post-Soviet",
                    "tryear:cgrsxCaucasus/EE post-Soviet",
                    "tryear:cgrsxBelarus"),
                  "tryear:cgrsxCentral Asia (CA)",
                  "subtract")
ms22dif$modval <- ms22eff$modval
ms22dif$m <- "Russo-Ukrainian War\n(2019-21 vs 2022)"

## Binding all results 
msalldif <- rbind(ms08dif,ms14dif,ms22dif)
msalldif$modval <- factor(msalldif$modval, 
                          levels = c("Non-post-Soviet", 
                                     "Belarus",
                                     "Caucasus/EE post-Soviet",
                                     "Baltic post-Soviet",
                                     "Central Asia (CA)"))
msalldif$m <- factor(msalldif$m, levels = unique(msalldif$m))
msalldif$type <- "Difference\nfrom CA states"

## Display Confidence Intervals
cat("Confidence intervals for difference from CA states")
cbind(msalldif[,1],round(msalldif[,c(2,4,5,6,7)],3))

# ## Binding Effects & Differences
# msalleffdif <- rbind(msalleff,msalldif)
# msalleffdif$type <- as.factor(msalleffdif$type)

## (THIS IS FIGURE A9 in ONLINE APPENDIX D.2)
require(ggplot2)
ggplot(msalleff, aes(x=est, y=modval, 
                     shape=factor(modval,levels=rev(levels(modval))),
                     color=factor(modval,levels=rev(levels(modval))))) + 
  geom_vline(aes(xintercept=0), linetype = 2) +
  geom_errorbarh(aes(xmin=lci95,xmax=uci95), 
                 linewidth=0.5, height=0, alpha=1)+ 
  geom_errorbarh(aes(xmin=lci90,xmax=uci90),
                 linewidth=1.5, height=0, alpha=0.8)+
  geom_point(size = 2.5) + 
  scale_shape_manual(name="Region", values=c(16,2,17,18,15)) + 
  scale_color_manual(name="Region", values=clrscale[c(1,4,5,6,3)]) + 
  facet_grid(m~.) + 
  labs(y = NULL, 
       x = paste0("Average effect of Russian adventurism (2008, 2014, 2022) on\n",
                  "the probability of ",
                  "sovereignty frame appearance in a given sentence"),
       caption = paste0("Note 1: Estimated with OLS regression, inversely weighted by speech length so that each speech is treated equally.\n",
                        "Note 2: Thin and thick bars respectively represent 95% and 90% confidence intervals using robust standard errors clustered by countries.\n",
                        "Note 3: The analysis excludes Russia, Ukraine, and Georgia.")) +
  theme_bw() + 
  theme(strip.text.y = element_text(angle=0),
        strip.background = element_rect(fill = "white"),
        panel.grid = element_blank(),
        plot.caption = element_text(hjust = 0),
        plot.caption.position = "plot",
        legend.position = "none")

ggsave("did_analysis_3yrs_subsets_v3.2.pdf", 
       width = 8, height = 5)
